#!/bin/bash
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0

# DeepSpeed Team
ACTOR_MODEL_PATH=$1
CRITIC_MODEL_PATH=$2
ACTOR_ZERO_STAGE=$3
CRITIC_ZERO_STAGE=$4
OUTPUT=$5
if [ "$OUTPUT" == "" ]; then
    OUTPUT=./output
fi
if [ "$ACTOR_ZERO_STAGE" == "" ]; then
    ACTOR_ZERO_STAGE=0
fi
if [ "$CRITIC_ZERO_STAGE" == "" ]; then
    CRITIC_ZERO_STAGE=0
fi
mkdir -p $OUTPUT
   
deepspeed --num_gpus 1 main.py \
   --actor_model_name_or_path /root/autodl-tmp/fcrlhf/DeepSpeed-Chat/training/step1_supervised_finetuning/output --critic_model_name_or_path /root/autodl-tmp/fcrlhf/DeepSpeed-Chat/training/step2_reward_model_finetuning/output \
   --actor_zero_stage 0 --critic_zero_stage 0 \
   --max_answer_seq_len 60 \
   --num_padding_at_beginning 1 --gradient_accumulation_steps 2 \
   --deepspeed --actor_lora_dim 128 --enable_hybrid_engine --actor_gradient_checkpointing --actor_dropout 0.0 \
   --output_dir ./output &> ./output/training.log

deepspeed --num_gpus 1 main.py \
   --actor_model_name_or_path /root/autodl-tmp/fcrlhf/DeepSpeed-Chat/training/step1_supervised_finetuning/imdb_facebook_opt-1.3b_seed-1 --critic_model_name_or_path /root/autodl-tmp/fcrlhf/DeepSpeed-Chat/tests/output \
   --data_path IMDBreview \
   --max_answer_seq_len 60 \
   --actor_zero_stage 0 --critic_zero_stage 0 \
   --num_padding_at_beginning 1 --gradient_accumulation_steps 2 \
   --deepspeed --actor_lora_dim 128 --enable_hybrid_engine --actor_gradient_checkpointing --actor_dropout 0.0 \
   --output_dir ./output &> ./output/training.log

# deepspeed --num_gpus 1 main.py \
#    --actor_model_name_or_path /root/autodl-tmp/fcrlhf/DeepSpeed-Chat/training/step1_supervised_finetuning/output --critic_model_name_or_path /root/autodl-tmp/fcrlhf/DeepSpeed-Chat/training/step2_reward_model_finetuning/output \
#    --actor_zero_stage 0 --critic_zero_stage 0 \
#    --max_answer_seq_len 256 \
#    --data_split 2,6,2 \
#    --num_padding_at_beginning 1 --gradient_accumulation_steps 2 \
#    --deepspeed --enable_hybrid_engine --actor_gradient_checkpointing --actor_dropout 0.0 \
#    --output_dir ./output &> ./output/training.log
# 会OOM!!

deepspeed --num_gpus 1 main.py \
   --actor_model_name_or_path /root/autodl-tmp/fcrlhf/DeepSpeed-Chat/training/step1_supervised_finetuning/output --critic_model_name_or_path /root/autodl-tmp/fcrlhf/DeepSpeed-Chat/training/step2_reward_model_finetuning/output \
   --actor_zero_stage 0 --critic_zero_stage 0 \
   --max_answer_seq_len 128 \
   --data_split 2,6,2 \
   --num_padding_at_beginning 1 --gradient_accumulation_steps 2 \
   --deepspeed --actor_lora_dim 128 --enable_hybrid_engine --actor_gradient_checkpointing --actor_dropout 0.0 \
   --output_dir ./output &> ./output/training.log

deepspeed --num_gpus 1 main.py \
   --actor_model_name_or_path /root/autodl-tmp/fcrlhf/DeepSpeed-Chat/training/step1_supervised_finetuning/imdb_facebook_opt-1.3b_seed-1 --critic_model_name_or_path /root/autodl-tmp/fcrlhf/DeepSpeed-Chat/tests/output \
   --data_path IMDBreview \
   --max_answer_seq_len 60 \
   --data_split 2,6,2 \
   --actor_zero_stage 0 --critic_zero_stage 0 \
   --num_padding_at_beginning 1 --gradient_accumulation_steps 2 \
   --deepspeed --actor_lora_dim 128 --enable_hybrid_engine --actor_gradient_checkpointing --actor_dropout 0.0 \
   --output_dir ./output &> ./output/training.log